# Set global options
knitr::opts_chunk$set(echo = TRUE
   , warning = FALSE
   , message = FALSE
    , fig.width = 10
    , fig.height = 10
   , results = "asis")
options(width = 12)

## Install or load required packages
library(dplyr)
library(tibble)
library(DT)
library(data.table)

library(ggplot2)
theme_set(theme_bw() + 
    theme(panel.spacing=grid::unit(0,"lines")))

library(scales)
library(gifski)
library(gganimate)

## Source helper functions from funs dir
source("funs/globalFuns.R")

About the data

This a five-year survival rates data for the patients of various Cancer types in the US from \(1963\) to \(2013\). The data was published by the US National Cancer Institute and downloaded from Data World.

Aim

Download and load data

## Create output directory
createDir(dirname = "HW6", dirpath = ".")

## Load/download data
url = "https://query.data.world/s/idhkwmkwynt4n4jlgmkslf43c5pd24"
raw_df <- downloadDf(filename = "cancer_survival_rates_usa"
    , filetype = "csv"
    , df_url = url
)

Reading dataset from your computer… cancer_survival_rates_usa.csv dataset already saved!!!

Variables

df_summary <- (raw_df
    %>% mutate_at("Year", as.factor)
    %>% summarizeDf()
    %>% setnames("Summary", "Summary ([min, max]; mean (sd) / label(%))")
)
datatable(df_summary, rownames = FALSE)

Cleaning

# To plot overall cancer survival for all cancer types
raw_df <- (raw_df
    %>% filter(!is.na(Survival.Rate))
    %>% setnames(names(.), tolower(gsub("\\.", "_", names(.))))
)

# To plot cancer survival rates by race and gender
working_df <- (raw_df
    %>% filter(!grepl("^All", race) & !grepl("All", cancer_type) & !grepl("total", gender))
)

Visualization

Overall Cancer Survival

overall_df <- (raw_df
    %>% filter(grepl("^All", race) & grepl("total", gender) & !grepl("^All", cancer_type))
    %>% group_by(year)
    %>% mutate(ranking = min_rank(-survival_rate))
)

plot1 <- (ggplot(overall_df, aes(ranking, group = cancer_type, colour = cancer_type, fill = cancer_type))
    + geom_tile(aes(y = survival_rate/2, height  = survival_rate, width = 0.9), colour = NA)
    + geom_text(aes(y = 0, label = paste0(cancer_type, " ")), vjust = 0.2, hjust = 1)
    + geom_text(aes(y = survival_rate, label = paste0(" ", percent(survival_rate)), hjust = 0))
    + coord_flip(clip = "off", expand = TRUE)
    + scale_x_reverse()
    + scale_color_viridis_d(name="")
    + scale_fill_viridis_d(name="")
    + guides(color = FALSE, fill = FALSE)
    + theme_minimal()
    + theme(plot.title = element_text(hjust = 0.5, face = "bold", colour = "grey")
        , axis.ticks.y = element_blank()
        , axis.text.y = element_blank()
        , plot.margin = margin(1,1,1,4, "cm")
    )
    + transition_states(states = year, transition_length = 4, state_length = 1)
    + ease_aes("cubic-in-out")
    + labs(title = "Cancer survival rate per Year : {closest_state}"
        , x = ""
        , y = "Survival rate"
    )
)
animate(plot1
    , nframes = 100
    , fps = 20
    , width = 950
    , height = 750
    , end_pause = 10
)

anim_save("csurvival_overall.gif", plot1)

Cancer Survival by Gender and Race

plot2 <- (ggplot(working_df, aes(x = year, y = survival_rate, colour = cancer_type, group = cancer_type))
    + geom_line()
    + scale_x_continuous(breaks = seq(1963, 2013, 10))
    + scale_y_continuous(labels = percent)
    + geom_point()
    + geom_text(aes(y = survival_rate, label = cancer_type))
    + scale_color_viridis_d(name="")
    + facet_grid(gender ~ race)
    + transition_reveal(year)
    + theme(legend.position="none")
)
animate(plot2
    , nframes = 100
    , fps = 20
    , width = 950
    , height = 750
    , end_pause = 10
)

anim_save("csurvival_racegender.gif", plot2)